capture log close
log using "Y:/Output", replace text

*************************************************************   
* do-file to create individual level variables from NEPS 	*
* 	create major level variables from NEPS			 		*
* 	merge into integrated dataset for analysis				*
* "A replication of Ochsenfeld (2016), Patzina & Toussaint" *											
*************************************************************


version 13
clear
set more off

*Definiton of the global path for importing NEPS-SC5 datasets
global data "Z:\SUF\Remote\SC5\SC5_R_4-0-0\Stata14"

*Definition of the global path for reading in Do Files
global file "Y:\"
 
*Definition of the global path for saving datasets and logfiles
global project "Z:\Projects\p000165_DUA_3627\Gender Segregation\"

cd "${project}"


*******************************
*** major level variables ***
*******************************

/*** results matrix
	1 - major id
	2 - major size
	3 - math intensity (mean)
	4 - math intensity (se)
	5 - diff: approval from parents (for women) - "" (for men)
	6 - diff: approval from peers (for women) - "" (for men)
	7 - women: approval from parents (mean)
	8 - women: approval from parents (se)
	9 - men: approval from parents (mean)
	10 - men: approval from parents (se)
	11 - female (mean)
	12 - female (se)
	13 - women: approval from peers (mean)
	14 - women: approval from peers (se)
	15 - men: approval from peers (mean)
	16 - men: approval from peers (se)
	17 - diff: approval from parents (se)
	18 - diff: approval from peers (se)
	19 - care major (0/1)
***/

*** compute approval rates and proportion female from neps ***
**************************************************************

*** merge data with weights ***
	* CATI - pTarget (wave 1)
	use "${data}\SC5_pTargetCATI_R_4-0-0.dta", clear
	keep ID_t-inty tg04001_g1R tg04004_g1R t700001 t70000y t30240a tg15003 tg15004 tg02001
	keep if wave==1
	save "${project}\data_fach_w1_w7.dta", replace

	* weights (for wave 1)
	use "${data}\SC5_Weights_R_4-0-0.dta", clear
	keep ID_t w_t1_std
	merge 1:1 ID_t using "${project}\data_fach_w1_w7.dta"
	drop _merge

	* homogenize variable name with his data
	recode tg04001_g1R(-97=.), gen(B1ber1ab1)
	recode tg04004_g1R(-97 -95 -93 -54=.), gen(B1ber2ab1)

* identify students on teacher track
gen lehramt=.
replace lehramt=1 if tg02001==1 | tg02001==2
replace lehramt=0 if tg02001==3 | tg02001==4| tg02001==5| tg02001==6| tg02001==7| tg02001==8
replace B1ber1ab1=80 if lehramt==1 

* merge small but similar majors
	* cath. theology --> prot. theology
	recode B1ber1ab1(3=2)
	* philosophy --> history
	recode B1ber1ab1(4=5)
	* non-german philologies and cultural sciences --> cultural sciences
	recode B1ber1ab1(1 7 8 9 10 11 12 13=14)
	* special needs pedagogy --> pegagogy
	recode B1ber1ab1(17=16)
	* social and economic studies, political science --> social sciences
	recode B1ber1ab1(23 25=26)
	* mining and metallurgy --> geosciences
	recode B1ber1ab1(62=43)
	* urban and regional planning --> geography
	recode B1ber1ab1(67 57=44)
	* dentistry --> medical science
	recode B1ber1ab1(50=49)
	* forestry --> agronomy
	recode B1ber1ab1(59=58)
	* design --> architecture and interior design
	recode B1ber1ab1(76=66)
	* surveying and mapping, traffic engineering --> civil engineering
	recode B1ber1ab1 (65 69=68)
	* fine arts, performing arts, music, musicology --> art, art history
	recode B1ber1ab1(75 77 78=74)
	* industrial engineering(ing) --> industrial engineering(bwl)
	recode B1ber1ab1(70=31)
	label value B1ber1ab1 de2543ext1

*** define sample ***
gen sample=1
gen alter= 2011-t70000y
replace sample=0 if alter>25		/* drop if respondent older than 25yrs when starting degree */
recode t700001 (-97=.)(1=0)(2=1), gen(frau)
replace sample=0 if frau==.
keep if sample==1

* only fields with at least 100 observations in the 2nd wave (below) 
gen fsamp=0
foreach num of numlist 5 14 15 16 26 27 28 30 31 37 38 39 40 42 44 49 58 63 64 66 68 74 80 {
	replace fsamp=1 if B1ber1ab1==`num' 
	}
keep if fsamp==1
drop fsamp

* results matrix
qui tab B1ber1ab1, gen(fachdum)
global fachnr = r(r)
lab val fachdum1-fachdum$fachnr de2543ext1
matrix fneps = J($fachnr, 19, .)

recode tg15003 (-98 -97=.)(1 2 3=0)(4 5=1), gen(eltap) 
recode tg15004 (-98 -97 -52=.)(1 2 3=0)(4 5=1), gen(peerap)
replace sample=0 if (eltap==. | peerap==.)
svyset ID_t[pw=w_t1_std]


* plug estimates into results matrix
qui forvalues i = 1/$fachnr {
	summarize B1ber1ab1 if (fachdum`i'==1)
		matrix fneps [`i', 1] = r(mean)
	svy: mean eltap if (fachdum`i'==1 & frau==1)
		matrix fneps [`i', 7] = _b[eltap]
		matrix fneps [`i', 8] = _se[eltap]
	svy: mean eltap if (fachdum`i'==1 & frau==0)
		matrix fneps [`i', 9] = _b[eltap]
		matrix fneps [`i', 10] = _se[eltap] 
	matrix fneps [`i', 5]  = (fneps[`i', 7] - fneps[`i', 9])	/* >0 if women get more approval than men */
	svy: mean eltap if (fachdum`i'==1), over(frau)
	lincom [eltap]1 - [eltap]0
		matrix fneps [`i', 17] = r(se)
	svy: mean peerap if (fachdum`i'==1 & frau==1)
		matrix fneps [`i', 13] = _b[peerap]
		matrix fneps [`i', 14] = _se[peerap]
	svy: mean peerap if (fachdum`i'==1 & frau==0)
		matrix fneps [`i', 15] = _b[peerap]
		matrix fneps [`i', 16] = _se[peerap]
	matrix fneps [`i', 6] = (fneps[`i', 13] - fneps[`i', 15])	 /* >0 if women get more approval than men */
	svy: mean peerap if (fachdum`i'==1), over(frau)
	lincom [peerap]1 - [peerap]0
		matrix fneps [`i', 18] = r(se)
	svy: mean frau if (fachdum`i'==1)
		matrix fneps [`i',11] = _b[frau]
		matrix fneps [`i',12] = _se[frau]
	}
matrix list fneps
	
*** mathint (item from wave2, therefore computed from smaller sample) ***
*************************************************************************

*** create dataset from multiple panel waves 
* CATI - pTarget (Welle 1)
	use "${data}\SC5_pTargetCATI_R_4-0-0.dta", clear
	keep ID_t-inty tg04001_g1R tg04004_g1R t700001 t70000y t30240a tg15003 tg15004 tg02001
	keep if wave==1
	save "${project}\data_fach_w2_w7.dta", replace

	* CAWI - pTarget (Welle 2)
	use "${data}\SC5_pTargetCAWI_R_4-0-0.dta", clear
	keep ID_t-tg51000 tg51300 tg51203 tg51311_g1R t291521
	keep if wave==2

	merge 1:1 ID_t using "${project}\data_fach_w2_w7.dta"
	keep if _merge==3
	drop _merge
	save "${project}\data_fach_w2_w7.dta", replace

	* weights (for waves 1 and 2)
	use "${data}\SC5_Weights_R_4-0-0.dta", clear
	keep ID_t w_t12_std
	merge 1:1 ID_t using "${project}\data_fach_w2_w7.dta"
	keep if _merge==3
	drop _merge

*** merge small but similar majors (as in census and his surveys)
	recode tg04001_g1R(-97=.), gen(B1ber1ab1)
	recode tg04004_g1R(-97 -95 -93 -54=.), gen(B1ber2ab1)

	* teachers *
	gen lehramt=.
	replace lehramt=1 if tg02001==1 | tg02001==2
	replace lehramt=0 if tg02001==3 | tg02001==4| tg02001==5| tg02001==6| tg02001==7| tg02001==8
	replace B1ber1ab1=80 if lehramt==1
	
	* cath. theology --> prot. theology
	recode B1ber1ab1(3=2)
	* philosophy --> history
	recode B1ber1ab1(4=5)
	* non-german philologies and cultural sciences --> cultural sciences
	recode B1ber1ab1(1 7 8 9 10 11 12 13=14)
	* special needs pedagogy --> pegagogy
	recode B1ber1ab1(17=16)
	* social and economic studies, political science --> social sciences
	recode B1ber1ab1(23 25=26)
	* mining and metallurgy --> geosciences
	recode B1ber1ab1(62=43)
	* urban and regional planning --> geography
	recode B1ber1ab1(67 57=44)
	* dentistry --> medical science
	recode B1ber1ab1(50=49)
	* forestry --> agronomy
	recode B1ber1ab1(59=58)
	* design --> architecture and interior design
	recode B1ber1ab1(76=66)
	* surveying and mapping, traffic engineering --> civil engineering
	recode B1ber1ab1 (65 69=68)
	* fine arts, performing arts, music, musicology --> art, art history
	recode B1ber1ab1(75 77 78=74)
	* industrial engineering(ing) --> industrial engineering(bwl)
	recode B1ber1ab1(70=31)
	label value B1ber1ab1 de2543ext1
	* "general engineering" does not exist in microcensus
	drop if B1ber1ab1==61

* define sample
gen sample=1
gen alter= 2011-t70000y
replace sample=0 if alter>25
recode t700001 (-97=.)(1=0)(2=1), gen(frau)
replace sample=0 if frau==.
replace sample=0 if tg51000!=1 	/*nur jene, die nicht abgebrochen haben */
drop if sample==0

* only fields with at least 100 observations
gen fsize=.
qui forvalues i=1/80 {
	count if B1ber1ab1==`i' & sample==1
	replace fsize = r(N) if B1ber1ab1==`i'
	}
	
**************
replace sample=0 if fsize<100
tab B1ber1ab1
keep if sample==1
**************

* generate math intensity index
tab B1ber1ab1, gen(fachdum)
global fachnr = r(r)
lab val fachdum1-fachdum$fachnr de2543ext1

recode t291521 (-97 -91=.)(1=0)(2=1)(3=2)(4=3),gen(mathint1)
gen mathint=mathint1/3

svyset ID_t[pw=w_t12_std]
qui forvalues i = 1/$fachnr {
	svy: mean mathint if (fachdum`i'==1 & sample==1)
		matrix fneps [`i', 2] = e(N)
		matrix fneps [`i', 3] = _b[mathint]
		matrix fneps [`i', 4] = _se[mathint]
	}

* reformat results	
matrix list fneps
svmat fneps
keep fneps*
drop if fneps1==.
lab var fneps1 "B1ber1ab1"
lab var fneps2 "obs_neps"
lab var fneps3 "mathint"
lab var fneps4 "mathint_se"
lab var fneps5 "diff_app_eltern_fm"
lab var fneps17 "diff_app_eltern_fm_se"
lab var fneps6 "diff_app_peers_fm"
lab var fneps18 "diff_app_peers_fm_se"
lab var fneps7 "app_eltern_f"
lab var fneps8 "app_eltern_f_se"
lab var fneps9 "app_eltern_m"
lab var fneps10 "app_eltern_m_se"
lab var fneps11 "propfemale_neps"
lab var fneps12 "propfemale_se"
lab var fneps13 "app_peers_f"
lab var fneps14 "app_peers_f_se"
lab var fneps15 "app_peers_m"
lab var fneps16 "app_peers_m_se"
lab var fneps19 "care"

*megre major characteristics with those created from census
gen byte B1ber1ab1=fneps1 /*generate merge variable */
sort B1ber1ab1
save "${project}\data_fach.dta", replace

use "${file}\fstats.dta", clear
gen byte B1ber1ab1=fstats1
sort B1ber1ab1
merge 1:1 B1ber1ab1 using "${project}\data_fach.dta"
drop _merge
gen id_fach = _n


gen hrswork=		fstats7 		/* 1 unit change=1 hour more 						*/
gen hrswork_se=		fstats8
gen intercpt=		fstats11*100 	/* 1 unit change=1% wage more 						*/
gen intercpt_se=	fstats12*100

gen mathint=		fneps3*100 		/* 1 unit change=1 perc points 						*/
gen mathint_se= 	fneps4*100
gen apfmelt=		fneps5*100 		/* 1 unit change=1 perc points 						*/
gen apfmelt_se=		fneps17*100
gen apfmpeer=		fneps6*100		/* 1 unit change=1 perc points 						*/
gen apfmpeer_se=	fneps18*100
gen apfelt=			fneps7*100
gen apfelt_se=		fneps8*100
gen apmelt=			fneps9*100
gen apmelt_se=		fneps10*100
gen pfemale2=		fneps11*100		/* 1 unit change=1 perc points						*/
gen pfemale2_se=	fneps12*100
gen apfpeer=		fneps13*100
gen apfpeer_se=		fneps14*100
gen apmpeer=		fneps15*100
gen apmpeer_se=		fneps16*100

/*values for discrimination adopted from Ochsenfeld (2016): Online appendix table A7,
generated from HIS graduate panel studies, 1997/2001 pooled*/
gen fdiscrim = 0
replace fdiscrim =25.7 if fneps1==16
replace fdiscrim =21.4 if fneps1==15
replace fdiscrim =35.1 if fneps1==27
replace fdiscrim =29.0 if fneps1==14
replace fdiscrim =34.0 if fneps1==42
replace fdiscrim =19.2 if fneps1==80
replace fdiscrim =44.8 if fneps1==49
replace fdiscrim =29.7 if fneps1==74
replace fdiscrim =30.2 if fneps1==66
replace fdiscrim =38.7 if fneps1==26
replace fdiscrim =35.0 if fneps1==28
replace fdiscrim =30.8 if fneps1==30
replace fdiscrim =29.9 if fneps1==44
replace fdiscrim =41.4 if fneps1==5
replace fdiscrim =36.1 if fneps1==40
replace fdiscrim =40.5 if fneps1==58
replace fdiscrim =26.4 if fneps1==37
replace fdiscrim =39.3 if fneps1==68
replace fdiscrim =32.8 if fneps1==39
replace fdiscrim =23.1 if fneps1==38
replace fdiscrim =34.1 if fneps1==31
replace fdiscrim =46.7 if fneps1==63
replace fdiscrim =33.0 if fneps1==64



* code as care fields: psychology, pedagogy, social work, medical science, state teacher
gen care = 0
replace care =100 if (fneps1==15 | fneps1==16 | fneps1==27 | fneps1==49 | fneps1==80)


* compute standard deviations and standardized variables for later use in effects model
foreach var of varlist pfemale2 hrswork intercpt mathint apfmelt apfmpeer care {
	gen `var'_sd = .				/* compute estimated sd								*/ 
	summarize `var'
	replace `var'_sd = r(sd)
	gen `var'_std = `var' / `var'_sd
	}

* add variable labels
foreach var of varlist fstats8 fstats12 fneps4 fneps8 fneps10 fneps12 fneps14 fneps16 fneps17 fneps18 {
	lab var `var' "s.e."
	}

save "${project}\data_fach_w7.dta", replace


*******************************************
*** individual characteristics: NEPS-SC4***
*******************************************

*Definiton of the global path for importing NEPS-SC4 datasets
global data "Z:\SUF\Remote\SC4\SC4_R_10-0-0\Stata14"


*** individual characteristics: NEPS-SC4***
*******************************************

*** create dataset from multiple panel waves 
	* pTarget (wave 3)
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t66210d t66210h t66210c t44613a t436300
    keep if wave==3
	save "${project}\data_ind_w7.dta", replace
	
	*pTarget (wave 7)
	use "${data}\SC4_pTarget_R_10-0-0.dta", clear
	keep ID_t wave t66207*
	keep if wave ==7
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	drop if _merge==2 /*drop cases without RIASEC information in wave 7*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	* CohortProfile (wave 1)
	use "${data}\SC4_CohortProfile_R_10-0-0.dta", clear
	keep ID_t wave tx80501 tx8050y
	keep if wave==1
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge
	save "${project}\data_ind_w7.dta", replace

	* spVocTrain
	use "${data}\SC4_spVocTrain_R_10-0-0.dta", clear
	keep ID_t wave spell ts15404_g1R ts15405_g1R ts15411
	tab ts15404_g1R 
	drop if ts15404_g1R == -54
	drop if ts15404_g1R == -96 
	drop if ts15404_g1R == -98 /*drop respondents who do not pursue studies & cases withtout field of study information*/
	by ID_t: gen minspell= _n 
	keep if minspell == 1 /*keep only information of first study program*/
	drop spell minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge == 3 /*drop cases without RIASEC information in wave 7 and respondents who do not pursue studies*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	*pTarget (grades)
	use "${data}\SC4_pTarget_R_10-0-0.dta"
	keep ID_t wave t724111 t724112 t724101 t724102 t724601 t724602
	drop if (t724111==. | t724111 ==-90 | t724111== -95) & (t724101 ==. | t724101 ==-98 | t724101 ==-95 | t724101 == -90)
	drop if (t724112==. | t724112 ==-90 | t724112==-95) & (t724102 ==. | t724102 ==-98 | t724102 ==-95 | t724102 == -90)
	recode t724111 (-90 -95 -54 -20=.)
	recode t724112 (-90 -95 -54 -20=.)
	recode t724101 (-90 -95 -98 -54 -20 -99=.)
	recode t724102 (-90 -95 -98 -54 -20 -99=.)
	gen deut =t724111 if t724111 ~=.
	replace deut = t724101 if deut==.
	gen math = t724112 if t724112~=.
	replace math = t724102 if math ==.
	recode t724601 (-54 -90 -95 =.)
	recode t724602 (-54 -90 -95 =.)
	drop if deut ==. & t724601==. /*drop spells without information on German grade*/
	drop if math ==. & t724602==. /*drop spells without information on Math grade*/
	by ID_t: gen minspell =_n
	keep if minspell ==1
	drop minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge ==3 /*drop cases without RIASEC information in wave 7 and respondents without information in grades*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	*spSchool (fullabi)
	use "${data}\SC4_spSchool_R_10-0-0.dta", clear
	keep ID_t wave spell tf11211 ts11209
	keep if ts11209==5 & tf11211==2 /*keep only repsondents with standard university entrance qualification (allg. HR)*/
    by ID_t: gen minspell= _n
	keep if minspell == 1 /*one spell per individual*/
	drop spell minspell
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge == 3 /*keep only cases with RIASEC information and standard university entrance qualification*/
	drop _merge
	save "${project}\data_ind_w7.dta", replace
	
	* weights (for wave 1)
	use "${data}\SC4_Weights_R_10-0-0.dta", clear
	keep ID_t w_t1
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge	
	order ID_t wave
	save "${project}\data_ind_w7.dta", replace
	
	* pscore
	use "${project}\data_pscore.dta"
	merge 1:1 ID_t using "${project}\data_ind_w7.dta"
	keep if _merge==3
	drop _merge	
	order ID_t wave
	save "${project}\data_ind_w7.dta", replace
	

*gender
recode tx80501 (-97 -54 -55=.)(1=0)(2=1), gen(frau)


*major
recode ts15404_g1R(-98 -96 -54=.), gen(B1ber1ab1)
recode ts15405_g1R(-96 -54=.), gen(B1ber2ab1)


* math grades vs german grades 
gen math_punkte =((-3*math)+17) /*convert grades into points*/
gen deut_punkte = ((-3*deut)+17)
recode math_punkte deut_punkte (-1=0)
gen dif_math_deut_pkt = (math_punkte - deut_punkte)
tab dif_math_deut_pkt
label variable dif_math_deut_pkt "relative math grade"


* RIASEC
recode  t66207a_g1 t66207b_g1 t66207c_g1 t66207d_g1 t66207e_g1 t66207f_g1(-55 -54=.)
gen int_doer = (t66207a_g1 - 1)/4
gen int_thinker = (t66207b_g1 - 1)/4
gen int_creator = (t66207c_g1 - 1)/4
gen int_helper = (t66207d_g1 - 1)/4
gen int_persuader = (t66207e_g1 - 1)/4
gen int_organizer = (t66207f_g1 - 1)/4


* job values
numlabel de789, add
recode t66210d (-90 -95=.)
recode t66210h (-90 -95=.)
recode t66210c (-90 -95=.)
gen flexi_index = (t66210d - 1)/5 /*flexible working hours are important*/	
gen bread_index = (t66210h - 1)/5 /*good remuneration is important*/
gen bread_index2 = (t66210c - 1)/5 /*career opportunities are important*/
recode t436300(4=0)(3=1)(2=2)(1=3)
recode t436300 (-90 -98=.)	
/* "men and women should do same duties in household": recode so that it reflects DISagreement with the statement, i.e. reflects traditionalistic attitude */
gen bread_index3 = (t436300 - 1)/3	
gen bread_index4 = (t436300 - 1)/3  
* "it's a man's job to earn money and a woman's to look after children


*identify students on teacher track
gen lehramt=.
replace lehramt=1 if ts15411==12 | ts15411==14 | ts15411==17
replace lehramt=0 if ts15411==7 | ts15411==8 | ts15411==9 | ts15411==10 | ts15411==11 | ///
ts15411==13 | ts15411==15 | ts15411==16 | ts15411==18 | ts15411==19 | ts15411==29
replace B1ber1ab1=80 if lehramt==1

*merge small but similar majors
* cath. theology --> prot. theology
recode B1ber1ab1(3=2)
* philosophy --> history
recode B1ber1ab1(4=5)
* non-german philologies and cultural sciences --> cultural sciences
recode B1ber1ab1(1 7 8 9 10 11 12 13=14)
* special needs pedagogy --> pegagogy
recode B1ber1ab1(17=16)
* social and economic studies, political science --> social sciences
recode B1ber1ab1(23 25=26)
* mining and metallurgy --> geosciences
recode B1ber1ab1(62=43)
* urban and regional planning --> geography
recode B1ber1ab1(67 57=44)
* dentistry --> medical science
recode B1ber1ab1(50=49)
* forestry --> agronomy
recode B1ber1ab1(59=58)
* design --> architecture and interior design
recode B1ber1ab1(76=66)
* surveying and mapping, traffic engineering --> civil engineering
recode B1ber1ab1 (65 69=68)
* fine arts, performing arts, music, musicology --> art, art history
recode B1ber1ab1(75 77 78=74)
* industrial engineering(ing) --> industrial engineering(bwl)
recode B1ber1ab1(70=31)
label value B1ber1ab1 de2543ext1


* define sample
*age
gen sample = 1
replace tx8050y =. if tx8050y < 0
*2014: first individuals enrolled in higher education
gen alter= 2014-tx8050y
tab alter, m
*no one older than 25 years--> no sample restriction needed
drop sample

* drop persons from majors not in the choice set
gen keep=0
foreach num of numlist 5 14 15 16 26 27 28 30 31 37 38 39 40 42 44 49 58 63 64 66 68 74 80 {
	replace keep=1 if B1ber1ab1==`num'
	}
tab keep
keep if keep==1
drop keep

* listwise deletion
gen sample=0
egen miss = rowmiss (frau B1ber1ab1 dif_math_deut_pkt int_doer int_thinker int_helper int_creator int_helper int_persuader int_organizer)
replace sample=1 if miss==0
tab sample

* supplementary analysis uses additional items and waves -> define separate sample.
gen sample_supl=0
egen miss_supl = rowmiss (frau B1ber1ab1 dif_math_deut_pkt bread_index bread_index2 bread_index3 bread_index4 flexi_index int_doer int_thinker int_helper int_creator int_helper int_persuader int_organizer)
replace sample_supl=1 if miss_supl==0
tab sample_supl

mat missatrand = J(13, 6, .)
mat colnames missatrand = mean_sample se_sample mean_drop se_drop diff wald
mat rownames missatrand = frau int_doer int_thinker int_creator int_helper int_persuader int_organizer dif_math_deut_pkt bread_index flexi_index


keep if sample==1		

* Compute standard deviations for all variables for later use in semi-standardized effects model
rename w_t1 w_t13_std
svyset ID_t[pw=w_t13_std]
foreach var of varlist frau int_doer int_thinker int_creator int_helper int_persuader int_organizer dif_math_deut_pkt flexi_index bread_index bread_index2 bread_index3 bread_index4  {
	gen `var'_sd = .
	svy: mean `var'
	estat sd
	mat sd = r(sd)
	replace `var'_sd = sd[1,1]
	mat drop sd
	gen `var'_std = `var' / `var'_sd 					/* standardize variable against sd for later use in effects model */
	}

gen id_ind=_n
global indnr=_N
sort ID_t
save "${project}\data_ind_w7.dta", replace


*** create matched individual-major dataset ***
drop _all
set more off
local r=$fachnr*$indnr
set obs `r'
gen rownr=_n

gen id_ind=.
qui forvalues i = 1/$indnr {
	replace id_ind=`i' if rownr>(`i'-1)*$fachnr & rownr<=(`i'*$fachnr)
	}
bysort id_ind: gen id_fach=_n

* import information from the above two datasets
merge m:1 id_ind using "${project}\data_ind_w7.dta"
drop _merge
merge m:1 id_fach using "${project}\data_fach_w7.dta"
drop _merge

* generate dependent variable
gen choice=0
replace choice=1 if B1ber1ab1==fneps1


* generate interaction terms
foreach var of varlist pfemale2 {
	gen `var'xfrau = `var'*frau
	}

rename dif_math_deut_pkt leist_md
rename dif_math_deut_pkt_std leist_md_std

gen intercptxbread = intercpt * bread_index
gen std_intercptxbread = intercpt_std * bread_index_std
gen intercptxbread2 = intercpt * bread_index2
gen std_intercptxbread2 = intercpt_std * bread_index2_std
gen intercptxbread3 = intercpt * bread_index3
gen std_intercptxbread3 = intercpt_std * bread_index3_std
gen intercptxbread3xfrau = intercpt * bread_index3 * frau
gen std_intercptxbread3xfrau = intercpt_std * bread_index3_std * frau
gen hrsworkxbread3 = hrswork * bread_index3
gen std_hrsworkxbread3 = hrswork_std * bread_index3_std
gen hrsworkxbread3xfrau = hrswork * bread_index3 * frau
gen std_hrsworkxbread3xfrau = hrswork_std * bread_index3_std * frau
gen intercptxbread4 = intercpt * bread_index4
gen std_intercptxbread4 = intercpt_std * bread_index4_std
gen intercptxbread4xfrau = intercpt * bread_index4 * frau
gen std_intercptxbread4xfrau = intercpt_std * bread_index4_std * frau
gen hrsworkxbread4 = hrswork * bread_index4
gen std_hrsworkxbread4 = hrswork_std * bread_index4_std
gen hrsworkxbread4xfrau = hrswork * bread_index4 * frau
gen std_hrsworkxbread4xfrau = hrswork_std * bread_index4_std * frau
gen mathintxleist_md = mathint * leist_md
gen std_mathintxleist_md = mathint_std * leist_md_std
gen hrsworkxflexi = hrswork * flexi_index
gen std_hrsworkxflexi = hrswork_std * flexi_index_std
gen apfmeltxfrau = apfmelt * frau
gen apfmpeerxfrau = apfmpeer * frau
gen std_apfmeltxfrau = apfmelt_std * frau_std
gen std_apfmpeerxfrau = apfmpeer_std * frau_std
gen mathintxdoer = mathint * int_doer
gen std_mathintxdoer = mathint_std * int_doer_std
gen mathintxthinker = mathint * int_thinker
gen std_mathintxthinker = mathint_std * int_thinker_std
gen mathintxcreator = mathint * int_creator
gen std_mathintxcreator = mathint_std * int_creator_std
gen mathintxhelper = mathint * int_helper
gen std_mathintxhelper = mathint_std * int_helper_std
gen mathintxpersuader = mathint * int_persuader
gen std_mathintxpersuader = mathint_std * int_persuader_std
gen mathintxorganizer = mathint * int_organizer
gen std_mathintxorganizer = mathint_std * int_organizer_std
gen carexdoer = care * int_doer
gen std_carexdoer = care_std * int_doer_std
gen carexthinker = care * int_thinker
gen std_carexthinker = care_std * int_thinker_std
gen carexcreator = care * int_creator
gen std_carexcreator = care_std * int_creator_std
gen carexhelper = care * int_helper
gen std_carexhelper = care_std * int_helper_std
gen carexpersuader = care * int_persuader
gen std_carexpersuader = care_std * int_persuader_std
gen carexorganizer = care * int_organizer
gen std_carexorganizer = care_std * int_organizer_std
gen fdiscrimxfrau = fdiscrim * frau


* generate major dummies for later use as FEs
tab id_fach, gen(fach)

* save dataset
order rownr id_ind ID_t id_fach frau choice
sort rownr id_ind id_fach
drop B1ber1ab1 rownr
drop ts11209 tf11211 t724111 t724112 t724101 t724102 t724601 t724602 deut math ts15404_g1R ts15405_g1R ts15411 tx80501 tx8050y fneps* fstats*
compress
save "${project}\data_choice_long_w7.dta", replace

*** END ***	
log close
exit, clear

